Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "cd8"
graph_weight
## [1] "5.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   21.00   22.00   20.98   22.00   23.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1]  2 18 19 19 20 20 20 21 21 21 21 21 21 21 21 21 21 21 22 22 22 22 22 22 22
## [26] 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1607    42
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    42  1565    42
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##     sym_in_data sym_limma
##  1:    C10orf91 LINC02870
##  2:    C12orf10      MYG1
##  3:    C12orf45  NOPCHAP1
##  4:     C6orf48    SNHG32
##  5:     C6orf99 LINC02901
##  6:    CXorf40A     EOLA1
##  7:     CXorf57      RADX
##  8:     FAM102A     EEIG1
##  9:     FAM173A    ANTKMT
## 10:     FAM213B    PRXL2B
## 11:       H2AFX      H2AX
## 12:   HIST1H2AG    H2AC11
## 13:   HIST1H2BK    H2BC12
## 14:   HIST1H2BN    H2BC15
## 15:    HIST1H3A      H3C1
## 16:    HIST1H3H     H3C10
## 17:    HIST1H4C      H4C3
## 18:   HIST2H2BF    H2BC18
## 19:    KIAA0391     PRORP
## 20:        QARS     EPRS1
## 21:       SEPT6   SEPTIN6
## 22:       ARNTL     BMAL1
## 23:    C12orf65     MTRFR
## 24:    C16orf72   HAPSTR1
## 25:      CCDC84   CENATAC
## 26:      DOPEY2     DOP1B
## 27:     FAM126B     HYCC2
## 28:    FAM160B1    FHIP2A
## 29:        H1FX     H1-10
## 30:       H2AFJ      H2AJ
## 31:       HEXDC      HEXD
## 32:    HIST1H1C      H1-2
## 33:    HIST1H1D      H1-3
## 34:    HIST1H1E      H1-4
## 35:    KIAA1109     BLTP1
## 36:    KIAA1551     RESF1
## 37:        MKL1     MRTFA
## 38:       NARFL     CIAO3
## 39:       SEPT2   SEPTIN6
## 40:      TARSL2     TARS3
## 41:      TMEM8A     PGAP6
## 42:       WDR60   DYNC2I1
##     sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 1649    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:      ABLIM1    ABLIM1      ABLIM1
## 2:  AC004687.1      <NA>  AC004687.1
## 3:  AC004854.2      <NA>  AC004854.2
## 4:  AC007384.1      <NA>  AC007384.1
## 5:  AC007952.4      <NA>  AC007952.4
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1647    1
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:       SEPT6   SEPTIN6     SEPTIN6
## 2:       SEPT2   SEPTIN6     SEPTIN6
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "SEPT6", gene_symbol:="SEPTIN6"]
gene_info[sym_in_data == "SEPT2", gene_symbol:="SEPTIN2"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 1649    3
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")

  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.1)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "CD83"   "RHOC"   "RPL41"  "TLE4"   "ASCL2"  "CD226"  "GNLY"   "MKLN1" 
##  [9] "PLAC8"  "PRR5L"  "PTPN4"  "SPN"    "THADA"  "TMSB10" "TMSB4X" "UVRAG" 
## [17] "VPS18"  "VPS39"  "XPO6"   "YPEL5"  "ZZEF1"

## [1] 2
##  [1] "DTHD1"   "GSTM1"   "NT5DC1"  "SLC27A5" "SLC4A4"  "TRAV8-4" "ZNF862" 
##  [8] "ADCY7"   "ARAP2"   "BICRAL"  "DIAPH2"  "DMTF1"   "GPR141"  "MCTP2"  
## [15] "MIAT"    "MTERF2"  "MYBL1"   "NFATC3"  "NLRC3"   "TUT7"    "UNC13D" 
## [22] "XIST"    "ZNF83"

## [1] 3
##  [1] "APMAP"     "CD84"      "CMC1"      "FAM118A"   "FCER1G"    "ICAM3"    
##  [7] "KLRF1"     "KLRG1"     "LEPROTL1"  "LETMD1"    "LRRC23"    "NCR1"     
## [13] "RTN3"      "SH2D1A"    "THAP9-AS1" "TMEM107"   "TMEM42"    "MCOLN2"   
## [19] "PATL2"     "PCSK7"     "RASA3"     "SLFN5"

## [1] 4
##  [1] "ARHGAP9"  "CYB5D2"   "HLA-DMB"  "INTS8"    "KLRB1"    "MZT2A"   
##  [7] "PDE3B"    "RCSD1"    "ABCC1"    "AHCTF1"   "DOPEY2"   "ENTPD4"  
## [13] "HLA-DQA1" "KLRD1"    "LPCAT1"   "PNPLA6"   "PTPN7"    "SLC38A10"
## [19] "ST6GAL1"  "TAP2"     "UGGT1"

## [1] 5
##  [1] "ACTR1B"   "ATP5MG"   "COQ7"     "CPNE1"    "FAM102A"  "GLRX5"   
##  [7] "MARCKSL1" "MPST"     "NMT2"     "RACK1"    "UIMC1"    "WDR86"   
## [13] "DNAJB14"  "GDPD5"    "HERC3"    "HERC6"    "KIAA0232" "NUP210"  
## [19] "R3HCC1L"  "ZNF335"   "ZNF800"

## [1] 6
##  [1] "AC245297.3"  "AL139246.5"  "BBS9"        "CRTAM"       "CXorf57"    
##  [6] "EOMES"       "HIST1H3H"    "MFNG"        "NUAK2"       "PITPNC1"    
## [11] "TRAV12-2"    "TRAV38-2DV8" "TRBV3-1"     "TRBV6-2"     "TRBV6-5"    
## [16] "CYTOR"       "FAM126B"     "GK5"         "GPR174"      "NRDC"       
## [21] "OGA"         "PARP15"      "PIK3R5"

## [1] 7
##  [1] "NUCB2"    "PAIP2"    "PDLIM1"   "PTGER4"   "RPL22L1"  "STMN3"   
##  [7] "CWC25"    "DENND4C"  "GIGYF1"   "GPANK1"   "HELZ"     "KIF13B"  
## [13] "L3HYPDH"  "MAN2C1"   "RNMT"     "SCAF8"    "SECISBP2" "TIMP1"   
## [19] "ZCCHC2"

## [1] 8
##  [1] "C10orf91" "ANKRD36B" "ANKRD36C" "C16orf72" "CD46"     "CHD9"    
##  [7] "CROCC"    "DGKD"     "EML4"     "MIGA1"    "RNF19A"   "RPS6KA5" 
## [13] "RSAD2"    "RUBCN"    "STAT4"    "STK10"    "SYNE1"    "UBE2H"   
## [19] "UBR2"     "VPS13C"   "ZNF493"   "ZNF708"

## [1] 9
##  [1] "ALOX5AP" "HDHD3"   "KLRK1"   "MATK"    "TMEM134" "ANKRD12" "BTBD9"  
##  [8] "CST7"    "GZMA"    "GZMB"    "IRF9"    "KLF3"    "LILRB1"  "MYO1F"  
## [15] "NKG7"    "RNF125"  "SPON2"   "SRGN"    "TMEM181" "UCP2"    "ZFAND3"

## [1] 10
##  [1] "AC008555.5" "AC044849.1" "AC083798.2" "AL135791.1" "COQ8A"     
##  [6] "FAM213B"    "HIKESHI"    "MTRNR2L8"   "NBPF14"     "PCMTD2"    
## [11] "PRAG1"      "RGL4"       "TOX"        "TRAV13-1"   "TRAV14DV4" 
## [16] "TRAV5"      "TRBV7-2"    "TRBV7-9"    "TRGV5"      "TRGV7"     
## [21] "WARS2"      "ZNF749"

## [1] 12
##  [1] "BTG1"     "HIST1H4C" "ID2"      "JADE1"    "LDLRAP1"  "MSC"     
##  [7] "PNRC1"    "WDR54"    "ZNF575"   "ARRDC3"   "BRPF1"    "COX19"   
## [13] "GCN1"     "GON4L"    "KAT6B"    "MAPK8IP3" "PHF14"    "PKD1"    
## [19] "RORA"     "USP16"    "ZNF292"

## [1] 13
##  [1] "AC004687.1" "AC004854.2" "AC015982.1" "AC020911.2" "AC083880.1"
##  [6] "AC091271.1" "AC103591.3" "AF213884.3" "AL357060.1" "AL451085.1"
## [11] "AL627171.1" "ARF4-AS1"   "ATP2B1-AS1" "C6orf99"    "HELQ"      
## [16] "HIPK1-AS1"  "KCNQ1OT1"   "LINC01465"  "MZF1-AS1"   "OSER1-DT"  
## [21] "PGGHG"      "NEK9"

## [1] 14
##  [1] "AOAH"      "CCL4L2"    "TRAV3"     "TRG-AS1"   "TRGV8"     "TRGV9"    
##  [7] "CARD11"    "CARD16"    "EPSTI1"    "FCRL6"     "FGL2"      "ITM2A"    
## [13] "LINC02384" "NECAP1"    "PTPRJ"     "TRAV9-2"   "TRDC"      "TRDV1"    
## [19] "TRGC2"     "TRGV4"     "TTC38"     "XCL2"

## [1] 15
##  [1] "ASAH1"    "COMMD6"   "COTL1"    "ITGB1BP1" "LAPTM5"   "NSUN6"   
##  [7] "PTPRCAP"  "TPGS2"    "CCNH"     "CD2"      "CD52"     "CD8A"    
## [13] "CD8B"     "ITK"      "LY6E"     "PRSS23"   "RHOH"     "SBK1"    
## [19] "SLC35F5"  "TRAC"     "TRBC2"    "ZDHHC20"

## [1] 16
##  [1] "TPRKB"   "BDP1"    "CHD1"    "DHX29"   "HELZ2"   "HEXDC"   "IFI44"  
##  [8] "IFIT2"   "IFIT3"   "KLHDC4"  "MSI2"    "MX1"     "NEMF"    "OAS3"   
## [15] "PHACTR4" "PLA2G6"  "REXO1"   "RNF157"  "RNPC3"   "SPOCK2"  "VAV3"   
## [22] "ZC3H7B"

## [1] 17
##  [1] "IL6R"       "SERINC5"    "TRBV6-1"    "ABCA5"      "ABR"       
##  [6] "AC020659.1" "CCDC84"     "CPPED1"     "DDX3Y"      "EIF1AY"    
## [11] "ENOSF1"     "IL6ST"      "KDM5D"      "MICAL2"     "OSM"       
## [16] "RPS4Y1"     "SBNO2"      "SIDT1"      "TRBV4-2"    "ZMIZ2"     
## [21] "ZNF236"     "ZNF652"

## [1] 18
##  [1] "ARHGAP45"    "CEMIP2"      "CRYBG1"      "FAM133B"     "MX2"        
##  [6] "OAS2"        "PIK3CD"      "PPP4R3B"     "PREX1"       "PUM3"       
## [11] "RIPOR2"      "S100A12"     "SAMD9L"      "TENT5C"      "THUMPD3-AS1"
## [16] "TMEM131L"    "TRANK1"      "TRAV19"      "TRAV27"      "TRBV11-2"   
## [21] "TRBV2"       "TUT4"

## [1] 19
##  [1] "AMD1"     "CD27"     "CREBL2"   "CYB561A3" "EI24"     "GIMAP1"  
##  [7] "GLTP"     "IER3"     "IFRD1"    "KIR3DL2"  "LIME1"    "MZF1"    
## [13] "NELL2"    "PDE7A"    "PPP1R15B" "SLC38A1"  "TMEM204"  "TSPYL4"  
## [19] "LTBP4"    "TRIM38"   "VPS13A"   "ZBTB40"

## [1] 20
##  [1] "RGCC"     "ACOX1"    "BTN3A1"   "CAST"     "CCDC88C"  "CELF2"   
##  [7] "DDHD1"    "KDM5A"    "MTMR6"    "NCKAP1L"  "NLRC5"    "PCYT1A"  
## [13] "PRKCH"    "RAP1GAP2" "RASGRP1"  "SPG11"    "SYNRG"    "TAOK1"   
## [19] "TAOK3"    "TIPARP"   "ZEB2"

## [1] 21
##  [1] "ID1"        "RGS1"       "AC092683.1" "AP005482.1" "ATAD2B"    
##  [6] "BMT2"       "DOCK10"     "ETFDH"      "FAM78A"     "GABPB2"    
## [11] "GRK2"       "HRH2"       "IGKV3-15"   "LINC02256"  "MARF1"     
## [16] "MINDY2"     "NBEAL2"     "POLR2J3-1"  "SEC14L1"    "THAP5"     
## [21] "Z93930.2"   "ZNF808"

## [1] 22
##  [1] "ARL4A"    "ABCA7"    "CAPNS1"   "ELMO1"    "ERICH1"   "FAM53B"  
##  [7] "FNBP1"    "HSH2D"    "IFI44L"   "IL2RG"    "IRAK4"    "KCNAB2"  
## [13] "KIAA1551" "KIAA2026" "KLF6"     "PNPLA8"   "PRDM2"    "SENP7"   
## [19] "TTC14"    "WASL"     "XAF1"

## [1] 23
##  [1] "AC012645.3" "AC027644.3" "AC087623.3" "AK5"        "AL118516.1"
##  [6] "BX284668.6" "CHRM3-AS2"  "FXYD7"      "IGLV1-44"   "INTS6L"    
## [11] "KMT2E-AS1"  "LINC00402"  "LINC00649"  "LST1"       "MAP3K2"    
## [16] "MATR3-1"    "PDCD4-AS1"  "RETREG1"    "SNHG15"     "TRAV21"    
## [21] "TRAV8-2"    "TRAV8-3"

## [1] 24
##  [1] "C12orf45" "CAMK4"    "CRLF3"    "GCH1"     "PLK2"     "ZNF10"   
##  [7] "ANKRD44"  "EHMT1"    "FRYL"     "MBD5"     "PIGF"     "POLH"    
## [13] "PPP6R2"   "PRR14L"   "RLF"      "SETD2"    "SETX"     "SP3"     
## [19] "USP34"    "WDR7"     "ZNF407"

## [1] 25
##  [1] "AL136454.1" "ATP5F1A"    "CCNI"       "COA1"       "EFCAB2"    
##  [6] "FAM173A"    "FCMR"       "FCRL3"      "GCSAM"      "GTF3A"     
## [11] "ITGAE"      "RTRAF"      "TRBC1"      "ADGRE5"     "ARHGAP30"  
## [16] "CD38"       "CTSW"       "IFITM2"     "MT2A"       "MYO1G"     
## [21] "SLA2"       "ZNF683"

## [1] 26
##  [1] "AIF1"     "CD28"     "YPEL3"    "ANKRD49"  "CASP10"   "CFLAR"   
##  [7] "ETNK1"    "FNDC3B"   "GCA"      "GPATCH2L" "H6PD"     "IGHA1"   
## [13] "LSS"      "NCOA7"    "PHF11"    "PRKX"     "PTGDS"    "SETD5"   
## [19] "WDTC1"

## [1] 27
##  [1] "GNAQ"       "NXT2"       "AC016831.7" "ADHFE1"     "AKNA"      
##  [6] "APOL6"      "ARL4C"      "ELMOD3"     "GBP1"       "GBP5"      
## [11] "GNPTAB"     "GPR132"     "IFI27"      "INPP4A"     "ISG20"     
## [16] "PDZD4"      "PPP1R16B"   "RAB27B"     "SLFN12L"    "SYTL3"     
## [21] "TTC16"

## [1] 28
##  [1] "BTG2"     "C12orf10" "C12orf57" "CCNL1"    "GSTM4"    "KCTD7"   
##  [7] "MAP3K8"   "MAPRE2"   "NELFCD"   "NEU1"     "PTRHD1"   "RPS26"   
## [13] "RSRP1"    "SEPT6"    "SMDT1"    "TGIF1"    "ZFP36L1"  "ACAP3"   
## [19] "PARP4"    "TEP1"

## [1] 29
##  [1] "AC016405.3" "AC087239.1" "AC245014.3" "ANXA2R"     "CITED2"    
##  [6] "CSKMT"      "CXorf40A"   "IER5"       "IGKV3-20"   "ILF3-DT"   
## [11] "NOCT"       "PIK3IP1"    "SDR42E2"    "SLC2A3"     "SLC38A2"   
## [16] "STK17A"     "TC2N"       "TRBV28"     "WSB1"       "Z93241.1"  
## [21] "ZFAS1"      "MALAT1"

## [1] 30
##  [1] "BACH2"  "CSRNP1" "EGR1"   "EPS8"   "FOSB"   "MYLIP"  "NOSIP"  "NR1D1" 
##  [9] "NR4A2"  "NR4A3"  "RCAN3"  "SELL"   "SNHG12" "TCF7"   "TXK"    "CYTH1" 
## [17] "DUS1L"  "JUND"   "MIDN"   "SCRN3"  "VCAN"

## [1] 31
##  [1] "ASL"     "CLDND1"  "CXXC5"   "GATA3"   "PAPSS1"  "RAB33B"  "SESN1"  
##  [8] "SESN2"   "TBCCD1"  "TIGIT"   "CSNK1G2" "ERAP2"   "GALNT3"  "ITGAM"  
## [15] "KIR2DL3" "PYROXD1" "RAPGEF1" "RUFY2"   "SZT2"    "TBX21"   "VPS13B" 
## [22] "YPEL1"   "ZDHHC5"

## [1] 32
##  [1] "NT5E"    "TBC1D17" "AFF1"    "AFF4"    "ARHGAP4" "BAZ2A"   "CEP164" 
##  [8] "CRNKL1"  "LRRFIP1" "LUC7L3"  "NKTR"    "PNN"     "PPIG"    "PPIL2"  
## [15] "SRSF10"  "SRSF11"  "WDR60"   "YTHDC1"

## [1] 33
##  [1] "ZC3H12A" "B4GALT1" "BCL9L"   "C2CD3"   "CAPN15"  "EHBP1L1" "LONP2"  
##  [8] "LPIN1"   "NAA25"   "NFKBIZ"  "PDE4B"   "PDE4D"   "PIM1"    "RALGAPB"
## [15] "RIC3"    "STK17B"  "TBC1D2B" "TOB1"    "UTY"     "ZBTB20"

## [1] 34
##  [1] "GALNT11"  "ADGRG1"   "ARHGAP10" "CCL4"     "CES1"     "CX3CR1"  
##  [7] "ERBIN"    "FGFBP2"   "GALNT10"  "GALNT2"   "ITGAL"    "LAG3"    
## [13] "MPPE1"    "MYO9B"    "PEX1"     "PEX26"    "PLEK"     "SEMA4D"  
## [19] "ST8SIA4"  "SUSD1"    "TMEM8A"

## [1] 35
##  [1] "HMBOX1"  "KIF9"    "ANKRD36" "AP3B1"   "AP3M2"   "ARAP1"   "ARID5B" 
##  [8] "ATG2A"   "ATG2B"   "CLEC16A" "CREBZF"  "FAM13B"  "GPRIN3"  "HECTD4" 
## [15] "INO80"   "INO80D"  "INPP5D"  "KLF2"    "MKL1"    "NARFL"   "NFE2L1" 
## [22] "PIP4K2B" "ZNF557"

## [1] 36
##  [1] "AC025164.1" "AL121944.1" "AL138963.3" "ARMH1"      "C6orf48"   
##  [6] "CCR7"       "INPP4B"     "JAML"       "LINC02273"  "LRRN3"     
## [11] "LTB"        "NPIPB4"     "NUP58"      "TCEA3"      "TMIGD2"    
## [16] "TNFRSF25"   "TRABD2A"    "TRAV1-2"    "TRBV9"      "OXNAD1"    
## [21] "PARP11"     "PCED1B"

## [1] 37
##  [1] "AC007952.4"   "AC119396.1"   "ALKBH7"       "BNIP3L"       "CD40LG"      
##  [6] "CMTM7"        "DYRK4"        "EPB41L4A-AS1" "EPHX2"        "GADD45B"     
## [11] "GZMK"         "HIBADH"       "MCUB"         "NOP53"        "RGS10"       
## [16] "SNHG8"        "TCP11L2"      "TESPA1"       "TRBV20-1"     "ZFAND1"      
## [21] "ITPR2"        "LINC02446"

## [1] 38
##  [1] "CST3"     "MAN2B1"   "SAE1"     "TRAT1"    "ATP2B4"   "CCDC112" 
##  [7] "CMKLR1"   "CTSC"     "DOCK11"   "FAM169A"  "LNPEP"    "LRRC8A"  
## [13] "PARP14"   "RAB3GAP2" "SLC4A7"   "SLK"      "TMEM127"  "TTC17"   
## [19] "VPS13D"   "ZNF276"

## [1] 39
##  [1] "ABCC10"     "ARHGEF3"    "COL6A2"     "COL6A3"     "DDX60L"    
##  [6] "DENND4B"    "EIF4E3"     "HECA"       "HPS4"       "LAIR2"     
## [11] "N4BP1"      "NUTM2B-AS1" "ODF3B"      "SUSD6"      "TRAV12-3"  
## [16] "TRAV17"     "TRAV4"      "TRBV7-6"    "TRGV10"     "TSPAN32"   
## [21] "TTTY15"     "XCL1"       "ZBP1"

## [1] 40
##  [1] "AC007384.1"  "AC025171.3"  "ARRDC2"      "NT5C3B"      "PRR7"       
##  [6] "SNHG9"       "AC116407.2"  "ARHGEF9"     "BROX"        "CHD6"       
## [11] "DDIT4"       "GPHN"        "HIPK1"       "IQCG"        "KIAA1109"   
## [16] "PCNX1"       "PSMA3-AS1"   "SLC16A1-AS1" "SLCO3A1"     "SLF2"       
## [21] "SPATA13"     "TARSL2"      "VTI1A"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_1"
##  [1] "CD83"   "RHOC"   "RPL41"  "TLE4"   "ASCL2"  "CD226"  "GNLY"   "MKLN1" 
##  [9] "PLAC8"  "PRR5L"  "PTPN4"  "SPN"    "THADA"  "TMSB10" "TMSB4X" "UVRAG" 
## [17] "VPS18"  "VPS39"  "XPO6"   "YPEL5"  "ZZEF1" 
## $reactome
##                           category over_represented_pvalue
## 895 sars cov 2 modulates autophagy            2.230272e-06
##     under_represented_pvalue numDEInCat numInCat         FDR
## 895                        1          3        3 0.002593807
## 
## $go_bp
##                    category over_represented_pvalue under_represented_pvalue
## 4354 snare complex assembly             2.79997e-06                        1
##      numDEInCat numInCat        FDR
## 4354          3        3 0.01313466
## 
## [1] "set_3"
##  [1] "APMAP"     "CD84"      "CMC1"      "FAM118A"   "FCER1G"    "ICAM3"    
##  [7] "KLRF1"     "KLRG1"     "LEPROTL1"  "LETMD1"    "LRRC23"    "NCR1"     
## [13] "RTN3"      "SH2D1A"    "THAP9-AS1" "TMEM107"   "TMEM42"    "MCOLN2"   
## [19] "PATL2"     "PCSK7"     "RASA3"     "SLFN5"    
## $reactome
##                                                                     category
## 435 immunoregulatory interactions between a lymphoid and a non lymphoid cell
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 435            8.235374e-06                0.9999998          5       34
##            FDR
## 435 0.00957774
## 
## [1] "set_9"
##  [1] "ALOX5AP" "HDHD3"   "KLRK1"   "MATK"    "TMEM134" "ANKRD12" "BTBD9"  
##  [8] "CST7"    "GZMA"    "GZMB"    "IRF9"    "KLF3"    "LILRB1"  "MYO1F"  
## [15] "NKG7"    "RNF125"  "SPON2"   "SRGN"    "TMEM181" "UCP2"    "ZFAND3" 
## $immune
##                                                                               category
## 4250                           gse45739 unstim vs acd3 acd28 stim nras ko cd4 tcell dn
## 4981 kazmin pbmc p falciparum rtss as01 age unknown correlated with protection 56dy ne
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4250            3.623625e-06                0.9999998          7       54
## 4981            6.626185e-06                0.9999998          5       22
##             FDR
## 4250 0.01689015
## 4981 0.01689015
## 
## [1] "set_15"
##  [1] "ASAH1"    "COMMD6"   "COTL1"    "ITGB1BP1" "LAPTM5"   "NSUN6"   
##  [7] "PTPRCAP"  "TPGS2"    "CCNH"     "CD2"      "CD52"     "CD8A"    
## [13] "CD8B"     "ITK"      "LY6E"     "PRSS23"   "RHOH"     "SBK1"    
## [19] "SLC35F5"  "TRAC"     "TRBC2"    "ZDHHC20" 
## $go_bp
##                               category over_represented_pvalue
## 4477 t cell receptor signaling pathway            1.445094e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 4477                0.9999993          6       43 0.06778935
## 
## [1] "set_16"
##  [1] "TPRKB"   "BDP1"    "CHD1"    "DHX29"   "HELZ2"   "HEXDC"   "IFI44"  
##  [8] "IFIT2"   "IFIT3"   "KLHDC4"  "MSI2"    "MX1"     "NEMF"    "OAS3"   
## [15] "PHACTR4" "PLA2G6"  "REXO1"   "RNF157"  "RNPC3"   "SPOCK2"  "VAV3"   
## [22] "ZC3H7B" 
## $immune
##                                                                           category
## 1342                                  gse18791 unstim vs newcatsle virus dc 10h dn
## 5074                                     sobolev pbmc pandemrix age 18 64yo 7dy dn
## 1368                                         gse18893 tconv vs treg 24h culture dn
## 4081                                        gse42724 naive bcell vs plasmablast up
## 1338                                     gse18791 ctrl vs newcastle virus dc 6h dn
## 5055                                 querec pbmc yf 17d vaccine age 18 45yo 3dy up
## 5056                                 querec pbmc yf 17d vaccine age 18 45yo 7dy up
## 1453 gse19888 adenosine a3r inh vs act with inhibitor pretreatment in mast cell up
## 10                erwin cohen blood tc 83 age 23 48yo vaccinated vs control 2dy up
## 4874                           gse9960 gram neg vs gram neg and pos sepsis pbmc up
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1342            3.851459e-07                1.0000000          8       55
## 5074            7.535735e-07                1.0000000          5       14
## 1368            3.311437e-06                0.9999999          6       34
## 4081            6.338755e-06                0.9999996          7       55
## 1338            1.595504e-05                0.9999989          7       63
## 5055            1.606477e-05                0.9999994          5       24
## 5056            1.606477e-05                0.9999994          5       24
## 1453            2.393285e-05                0.9999986          6       44
## 10              2.771849e-05                0.9999983          6       45
## 4874            3.128033e-05                0.9999986          5       28
##              FDR
## 1342 0.001920859
## 5074 0.001920859
## 1368 0.005627235
## 4081 0.008078743
## 1338 0.011699744
## 5055 0.011699744
## 5056 0.011699744
## 1453 0.015251207
## 10   0.015700984
## 4874 0.015946710
## 
## [1] "set_17"
##  [1] "IL6R"       "SERINC5"    "TRBV6-1"    "ABCA5"      "ABR"       
##  [6] "AC020659.1" "CCDC84"     "CPPED1"     "DDX3Y"      "EIF1AY"    
## [11] "ENOSF1"     "IL6ST"      "KDM5D"      "MICAL2"     "OSM"       
## [16] "RPS4Y1"     "SBNO2"      "SIDT1"      "TRBV4-2"    "ZMIZ2"     
## [21] "ZNF236"     "ZNF652"    
## $reactome
##                           category over_represented_pvalue
## 478 interleukin 6 family signaling            2.671849e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 478                0.9999998          3        8 0.03107361
## 
## $go_bp
##                       category over_represented_pvalue under_represented_pvalue
## 4122 response to interleukin 6            1.242271e-05                0.9999999
##      numDEInCat numInCat        FDR
## 4122          3        5 0.05827493
## 
## [1] "set_18"
##  [1] "ARHGAP45"    "CEMIP2"      "CRYBG1"      "FAM133B"     "MX2"        
##  [6] "OAS2"        "PIK3CD"      "PPP4R3B"     "PREX1"       "PUM3"       
## [11] "RIPOR2"      "S100A12"     "SAMD9L"      "TENT5C"      "THUMPD3-AS1"
## [16] "TMEM131L"    "TRANK1"      "TRAV19"      "TRAV27"      "TRBV11-2"   
## [21] "TRBV2"       "TUT4"       
## $go_bp
##                    category over_represented_pvalue under_represented_pvalue
## 2154  neutrophil chemotaxis            1.564504e-05                0.9999997
## 2160   neutrophil migration            2.912363e-05                0.9999993
## 1047 granulocyte chemotaxis            3.103883e-05                0.9999992
## 1050  granulocyte migration            5.931136e-05                0.9999983
##      numDEInCat numInCat        FDR
## 2154          4       19 0.04853439
## 2160          4       22 0.04853439
## 1047          4       22 0.04853439
## 1050          4       26 0.06955740
## 
## [1] "set_19"
##  [1] "AMD1"     "CD27"     "CREBL2"   "CYB561A3" "EI24"     "GIMAP1"  
##  [7] "GLTP"     "IER3"     "IFRD1"    "KIR3DL2"  "LIME1"    "MZF1"    
## [13] "NELL2"    "PDE7A"    "PPP1R15B" "SLC38A1"  "TMEM204"  "TSPYL4"  
## [19] "LTBP4"    "TRIM38"   "VPS13A"   "ZBTB40"  
## $immune
##                                 category over_represented_pvalue
## 1977 gse22886 naive tcell vs monocyte up            1.476627e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1977                0.9999989          7       64 0.07527844
## 
## [1] "set_30"
##  [1] "BACH2"  "CSRNP1" "EGR1"   "EPS8"   "FOSB"   "MYLIP"  "NOSIP"  "NR1D1" 
##  [9] "NR4A2"  "NR4A3"  "RCAN3"  "SELL"   "SNHG12" "TCF7"   "TXK"    "CYTH1" 
## [17] "DUS1L"  "JUND"   "MIDN"   "SCRN3"  "VCAN"  
## $reactome
##                                   category over_represented_pvalue
## 645 nuclear receptor transcription pathway            5.987482e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 645                0.9999994          3        6 0.06963441
## 
## $immune
##                                                             category
## 4345                       gse4984 lps vs vehicle ctrl treated dc up
## 3878 gse40666 stat1 ko vs stat4 ko cd8 tcell with ifna stim 90min dn
## 708                        gse15733 bm vs spleen memory cd4 tcell dn
## 3537            gse37605 foxp3 fusion gfp vs ires gfp treg c57bl6 up
## 5021                 nakaya pbmc fluarix fluvirin age 18 50yo 7dy dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4345            8.073533e-06                0.9999997          5       26
## 3878            3.607021e-05                0.9999984          5       26
## 708             5.836425e-05                0.9999970          5       33
## 3537            5.870079e-05                0.9999971          5       28
## 5021            8.787659e-05                0.9999917          7       76
##             FDR
## 4345 0.04115887
## 3878 0.07481416
## 708  0.07481416
## 3537 0.07481416
## 5021 0.08959897
## 
## [1] "set_31"
##  [1] "ASL"     "CLDND1"  "CXXC5"   "GATA3"   "PAPSS1"  "RAB33B"  "SESN1"  
##  [8] "SESN2"   "TBCCD1"  "TIGIT"   "CSNK1G2" "ERAP2"   "GALNT3"  "ITGAM"  
## [15] "KIR2DL3" "PYROXD1" "RAPGEF1" "RUFY2"   "SZT2"    "TBX21"   "VPS13B" 
## [22] "YPEL1"   "ZDHHC5" 
## $reactome
##                       category over_represented_pvalue under_represented_pvalue
## 47 amino acids regulate mtorc1            4.312411e-05                0.9999996
##    numDEInCat numInCat        FDR
## 47          3        6 0.05015334
## 
## [1] "set_32"
##  [1] "NT5E"    "TBC1D17" "AFF1"    "AFF4"    "ARHGAP4" "BAZ2A"   "CEP164" 
##  [8] "CRNKL1"  "LRRFIP1" "LUC7L3"  "NKTR"    "PNN"     "PPIG"    "PPIL2"  
## [15] "SRSF10"  "SRSF11"  "WDR60"   "YTHDC1" 
## $reactome
##                                            category over_represented_pvalue
## 579                                   mrna splicing            2.449222e-07
## 721 processing of capped intron containing pre mrna            8.173826e-07
##     under_represented_pvalue numDEInCat numInCat          FDR
## 579                        1          7       38 0.0002848445
## 721                        1          7       45 0.0004753080
## 
## $go_bp
##                                            category over_represented_pvalue
## 4255                                   rna splicing            3.815935e-07
## 4395                  spliceosomal complex assembly            1.192757e-05
## 1584                   mrna splice site recognition            7.284369e-05
## 4257 rna splicing via transesterification reactions            8.480068e-05
##      under_represented_pvalue numDEInCat numInCat         FDR
## 4255                1.0000000          8       72 0.001790055
## 4395                0.9999998          4       14 0.027976117
## 1584                0.9999991          3        8 0.099449994
## 4257                0.9999955          5       43 0.099449994
## 
## [1] "set_34"
##  [1] "GALNT11"  "ADGRG1"   "ARHGAP10" "CCL4"     "CES1"     "CX3CR1"  
##  [7] "ERBIN"    "FGFBP2"   "GALNT10"  "GALNT2"   "ITGAL"    "LAG3"    
## [13] "MPPE1"    "MYO9B"    "PEX1"     "PEX26"    "PLEK"     "SEMA4D"  
## [19] "ST8SIA4"  "SUSD1"    "TMEM8A"  
## $reactome
##                             category over_represented_pvalue
## 654 o linked glycosylation of mucins            3.406216e-05
## 653           o linked glycosylation            6.745152e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 654                0.9999998          3        5 0.03922306
## 653                0.9999993          3        6 0.03922306
## 
## $go_bp
##                 category over_represented_pvalue under_represented_pvalue
## 2230 o glycan processing            1.881402e-05                0.9999999
## 1031       glycosylation            1.935866e-05                0.9999996
##      numDEInCat numInCat        FDR
## 2230          3        5 0.04540574
## 1031          4       14 0.04540574
## 
## $immune
##                                                category over_represented_pvalue
## 4252 gse45739 unstim vs acd3 acd28 stim wt cd4 tcell dn            1.901911e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 4252                0.9999986          7       70 0.09695942
## 
## [1] "set_36"
##  [1] "AC025164.1" "AL121944.1" "AL138963.3" "ARMH1"      "C6orf48"   
##  [6] "CCR7"       "INPP4B"     "JAML"       "LINC02273"  "LRRN3"     
## [11] "LTB"        "NPIPB4"     "NUP58"      "TCEA3"      "TMIGD2"    
## [16] "TNFRSF25"   "TRABD2A"    "TRAV1-2"    "TRBV9"      "OXNAD1"    
## [21] "PARP11"     "PCED1B"    
## $immune
##                                        category over_represented_pvalue
## 2391     gse26495 naive vs pd1high cd8 tcell up            4.581605e-06
## 2393      gse26495 naive vs pd1low cd8 tcell up            8.116493e-06
## 142  gse11057 naive vs cent memory cd4 tcell up            5.570191e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 2391                0.9999999          5       30 0.02068894
## 2393                0.9999998          5       33 0.02068894
## 142                 0.9999984          4       25 0.09465612
## 
## [1] "set_39"
##  [1] "ABCC10"     "ARHGEF3"    "COL6A2"     "COL6A3"     "DDX60L"    
##  [6] "DENND4B"    "EIF4E3"     "HECA"       "HPS4"       "LAIR2"     
## [11] "N4BP1"      "NUTM2B-AS1" "ODF3B"      "SUSD6"      "TRAV12-3"  
## [16] "TRAV17"     "TRAV4"      "TRBV7-6"    "TRGV10"     "TSPAN32"   
## [21] "TTTY15"     "XCL1"       "ZBP1"      
## $reactome
##                                                         category
## 159                  collagen biosynthesis and modifying enzymes
## 160                                 collagen chain trimerization
## 72  assembly of collagen fibrils and other multimeric structures
## 162                                           collagen formation
## 590                                           ncam1 interactions
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 159            9.184822e-05                1.0000000          2        2
## 160            9.184822e-05                1.0000000          2        2
## 72             2.146121e-04                0.9999995          2        3
## 162            2.146121e-04                0.9999995          2        3
## 590            2.835424e-04                0.9999992          2        3
##            FDR
## 159 0.05340974
## 160 0.05340974
## 72  0.06239846
## 162 0.06239846
## 590 0.06595195
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958444 478.5   16391124 875.4         NA 16391124 875.4
## Vcells 19169726 146.3   73326430 559.5      65536 91658037 699.3
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1